cat("\f")
rm(list=ls())
options(scipen = 999)

library(dplyr)  #adattáblákkal való műveletekhez
library(ggplot2) #vizualizációkhoz
library(writexl) #xlsx-be kiírása táblázatoknak
library(extrafont) #betűtípusok betöltése
#font_import()
loadfonts(device = "win")

set.seed(123)

#betöltöm a szükséges fájlokat:

ctable <- readRDS("data/ctable.RDS")
df <- readRDS("data/full_df3.RDS")

names(ctable)[names(ctable) == "id"] <- "ctable_nearest"

df <- left_join(df, select(ctable, c(ctable_nearest, color, secondary_hexcode, primary_hexcode)),by = join_by(ctable_nearest))

names(df)

df <- df %>% select(-c(km, cluster_hex_code, wdist))

#####################################
#Könyök ábra a k-közép klaszterekkel#
#####################################

k <- 5:20

plot_df <- data.frame(k)

totwss <- c()

#mindegyikre megnézem a k-n belüli szórását

for (i in 5:20) {
  
  km <- readRDS(paste0("data/km/km",i,".RDS"))
  
  totwss[length(totwss) +1] <- km$tot.withinss
  
  print(length(totwss))
  
}

plot_df$totwss <- totwss

plot_df$totwss <- (plot_df$totwss/1000000000)

#megnézem, hogy hogyan alakul a szórás a k függvényében:

plot1 <- ggplot(plot_df, aes(k, totwss))+
  geom_line()+
  geom_point()+
  theme_classic()+
  xlab("Number of clusters")+
  ylab("Total within-cluster sum of squares (billion)")+
  theme(text = element_text(family = "Times New Roman"))

plot1

#lementem a plotot ábra és RDS formátumban:

saveRDS(plot1, "plot/totwss.RDS")

ggsave("plot/totwss.jpg", dpi = 300, width = 8, height = 5)

##################################
#Összes pont az eredeti színekkel#
##################################

plot2 <- ggplot(df, aes(x = pc1, y = pc2, color = hex_code))+
  geom_point(size = 0.01)+
  guides(color="none")+
  theme_classic()+
  scale_color_identity()+
  theme(text = element_text(family = "Times New Roman"))


saveRDS(plot2, "plot/every_own_color.RDS")

ggsave(plot = plot2, "plot/every_own_color.jpg", dpi = 300, width = 8, height = 5)

############################################################################################

test <- subset(df, pic == 108)

plot5 <- ggplot(test, aes(x = pc1, y = pc2, color = hex_code))+
  geom_point(size = 0.01)+
  guides(color="none")+
  theme_classic()+
  scale_color_identity()+
  theme(text = element_text(family = "Times New Roman"))


ggsave(plot = plot5, "plot/test.jpg", dpi = 300, width = 8, height = 5)

############################################################################################


#####################################
#Összes k-közép klaszterrel az ábrák#
#####################################

for (i in 5:20) {
  
  
  km <- readRDS(paste0("data/km/km",i,".RDS"))
  
  
  df$km <- km$cluster
  
  
  color_summary <- df %>% group_by(km) %>% summarise(R = mean(R),
                                                     G = mean(G),
                                                     B = mean(B))
  
  
  color_summary <- color_summary %>% mutate(cluster_hex_code = rgb(red = R,
                                                                   green = G,
                                                                   blue = B,
                                                                   maxColorValue = 255))
  
  
  df <- left_join(df, select(color_summary, c(km, cluster_hex_code)), by = join_by(km))
  
  
  plot2 <- ggplot(df, aes(x = pc1, y = pc2, color = cluster_hex_code))+
    geom_point(size = 0.01)+
    guides(color="none")+
    theme_classic()+
    scale_color_identity()+
    theme(text = element_text(family = "Times New Roman"))
  
  
  
  saveRDS(plot2, paste0("plot/km/every_km", i, ".RDS"))
  
  ggsave(plot = plot2, paste0("plot/km/every_km",i,".jpg"), dpi = 300, width = 8, height = 5)
  
  
  summary <- group_by(df, km) %>% summarise(szám = n(),
                                            arány = n() / nrow(df))
  
  write_xlsx(summary, paste0("data/k_tables/k",i,".xlsx"))
  
  
  df <- df %>% select(-c(km, cluster_hex_code))
  
  rm(plot2)
  
  print(i)
}

##########################################
#az összes pont a secondary color alapján#
##########################################

plot6 <- ggplot(df, aes(x = pc1, y = pc2, color = secondary_hexcode))+
  geom_point(size = 0.01)+
  guides(color="none")+
  theme_classic()+
  scale_color_identity()+
  theme(text = element_text(family = "Times New Roman"))



ggsave(plot = plot6, "plot/every_secondary_color_ctable.jpg", dpi = 300, width = 8, height = 5)

saveRDS(plot6, "plot/every_secondary_color_ctable.RDS")

#lementem az arányokat xlsx-be

summary <- group_by(df, ctable_nearest) %>% summarise(szám = n(),
                                          arány = n() / nrow(df))

names(summary)[names(summary) == "ctable_nearest"] <- "color"

write_xlsx(summary, "data/color_117_arany.xlsx")

########################################
#az összes pont a primary color alapján#
########################################


plot7 <- ggplot(df, aes(x = pc1, y = pc2, color = primary_hexcode))+
  geom_point(size = 0.01)+
  guides(color="none")+
  theme_classic()+
  scale_color_identity()+
  theme(text = element_text(family = "Times New Roman"))



ggsave(plot = plot7, "plot/every_primary_color_ctable.jpg", dpi = 300, width = 8, height = 5)

saveRDS(plot7, "plot/every_primary_color_ctable.RDS")

#lementem az arányokat xlsx-be

summary <- group_by(df, color) %>% summarise(szám = n(),
                                          arány = n() / nrow(df))


write_xlsx(summary, "data/color_10_arany.xlsx")

#####################
#a 14 szín címkézve:#
#####################


km <- readRDS("data/km/km14.RDS")

names(df)



df$km <- km$cluster


color_summary <- df %>% group_by(km) %>% summarise(R = mean(R),
                                                   G = mean(G),
                                                   B = mean(B))


color_summary <- color_summary %>% mutate(cluster_hex_code = rgb(red = R,
                                                                 green = G,
                                                                 blue = B,
                                                                 maxColorValue = 255))


df <- left_join(df, select(color_summary, c(km, cluster_hex_code)), by = join_by(km))

df <- df %>% ungroup()

pos_table <- summarise(group_by(df, km), x = mean(min(pc1), max(pc1)),
                                         y = mean(min(pc2), max(pc2)))

pos_table <- summarise(group_by(df, km), x = median(pc1),
                                         y = median(pc2))


#df <- left_join(df, pos_table,by = join_by(km))

plot <- ggplot(df, aes(x = pc1, y = pc2, color = cluster_hex_code))+
  geom_point(size = 0.01)+
  guides(color="none")+
  #geom_label(aes(x = x, y = y, label = km))+
  theme_classic()+
  scale_color_identity()+
  theme(text = element_text(family = "Times New Roman"))


for (i in 1:nrow(pos_table)) {
  
  plot <- plot + annotate(
    "text", label = pos_table$km[i],
    x = pos_table$x[i], y = pos_table$y[i], size = 8, colour = "black"
  )
  
  print(i)
  
}

#saveRDS(plot, "plot/numbered_k14.RDS")

ggsave(plot = plot, "plot/numbered_k14.jpg", dpi = 300, width = 8, height = 5)

#################################
#összevont klaszterek ábrázolása#
#################################


km <- readRDS("data/km/km14.RDS")


df$km <- km$cluster

df$km <- df$km %>% as.character()

df$km[df$km %in% c("5", "12", "10")] <- "5-12-10"

df$km[df$km %in% c("3", "14", "13")] <- "3-14-13"

df$km[df$km %in% c("6", "9", "4", "11")] <- "6-9-4-11"

df$km[df$km %in% c("7", "8")] <- "7-8"

unique(df$km)

color_summary <- df %>% group_by(km) %>% summarise(R = mean(R),
                                                   G = mean(G),
                                                   B = mean(B))


color_summary <- color_summary %>% mutate(cluster_hex_code = rgb(red = R,
                                                                 green = G,
                                                                 blue = B,
                                                                 maxColorValue = 255))


df <- left_join(df, select(color_summary, c(km, cluster_hex_code)), by = join_by(km))



plot <- ggplot(df, aes(x = pc1, y = pc2, color = cluster_hex_code))+
  geom_point(size = 0.01)+
  guides(color="none")+
  theme_classic()+
  scale_color_identity()+
  theme(text = element_text(family = "Times New Roman"))


ggsave(plot = plot, "plot/összevontk14.jpg", dpi = 300, width = 8, height = 5)
